dir = 'results/pangenome/Bacteroides_xylanisolvens_outgroup'
system(paste0('mkdir -pv ',file.path(dir,'analyses')))

Bxylan analyses

3 Bovatus genomes included in roary run

Raxml with 100 bootstraps run on tree

Reformat metadata

metadata <- read.table('metadata/strain_ncbi_metadata_assembly_all.txt',header=T)
metadata <- metadata %>%
  mutate(taxonomy_Species = str_replace_all(taxonomy_Species,' ','_'),
         isolate = str_replace_all(isolate,'[_-]','.'),
         site = recode(site,'USA: Cambridge'='USA','USA:Boston'='USA',
                       'China: Shenzhen'='China','USA:Seattle'='USA',     
                       'USA:Baltimore'='USA', 'not applicable'='siteUnknown',
                       'missing'='siteUnknown')) %>%
  unite(host_site, host, site, sep = "_", remove = FALSE) 

Filter out Bxy assemblies low quality

#Bxy roary was run with exclude lower quality assemblies
#completeness and contamination associated with genome size and # of predicted genes
Bxy_metadata <- metadata %>% filter(taxonomy_Species == 'Bacteroides_xylanisolvens')
summary(lm_robust(Genome.size ~ Contamination + Completeness, data=Bxy_metadata))
## 
## Call:
## lm_robust(formula = Genome.size ~ Contamination + Completeness, 
##     data = Bxy_metadata)
## 
## Standard error type:  HC2 
## 
## Coefficients:
##               Estimate Std. Error t value  Pr(>|t|) CI Lower CI Upper  DF
## (Intercept)     513870    1341840   0.383 7.024e-01 -2141190  3168930 128
## Contamination    46545       2377  19.583 2.538e-40    41842    51248 128
## Completeness     58326      13597   4.290 3.498e-05    31422    85231 128
## 
## Multiple R-squared:  0.3116 ,    Adjusted R-squared:  0.3008 
## F-statistic: 220.2 on 2 and 128 DF,  p-value: < 2.2e-16
summary(lm_robust(X..predicted.genes ~ Contamination+ Completeness, data=Bxy_metadata))
## 
## Call:
## lm_robust(formula = X..predicted.genes ~ Contamination + Completeness, 
##     data = Bxy_metadata)
## 
## Standard error type:  HC2 
## 
## Coefficients:
##               Estimate Std. Error t value  Pr(>|t|) CI Lower  CI Upper  DF
## (Intercept)    8676.61   2046.114   4.241 4.239e-05  4628.02 12725.191 128
## Contamination    56.39      8.462   6.663 7.184e-10    39.64    73.129 128
## Completeness    -35.46     20.670  -1.716 8.864e-02   -76.36     5.435 128
## 
## Multiple R-squared:  0.2168 ,    Adjusted R-squared:  0.2046 
## F-statistic: 24.99 on 2 and 128 DF,  p-value: 6.874e-10
#apply qc filter
Bxy_metadata_filtered <- metadata %>% 
  filter(taxonomy_Species == 'Bacteroides_xylanisolvens') %>%
  filter(Completeness>94&Contamination<5)
summary(lm_robust(Genome.size ~ Contamination + Completeness, data=Bxy_metadata_filtered))
## 
## Call:
## lm_robust(formula = Genome.size ~ Contamination + Completeness, 
##     data = Bxy_metadata_filtered)
## 
## Standard error type:  HC2 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper  DF
## (Intercept)    2062548    2037963   1.012  0.31353 -1972137  6097232 121
## Contamination    44173      42699   1.035  0.30295   -40361   128708 121
## Completeness     42709      20542   2.079  0.03972     2041    83378 121
## 
## Multiple R-squared:  0.01709 ,   Adjusted R-squared:  0.0008399 
## F-statistic: 2.161 on 2 and 121 DF,  p-value: 0.1196
summary(lm_robust(X..predicted.genes ~ Contamination + Completeness, data=Bxy_metadata_filtered))
## 
## Call:
## lm_robust(formula = X..predicted.genes ~ Contamination + Completeness, 
##     data = Bxy_metadata_filtered)
## 
## Standard error type:  HC2 
## 
## Coefficients:
##               Estimate Std. Error t value  Pr(>|t|) CI Lower CI Upper  DF
## (Intercept)   23531.52    3780.23   6.225 7.199e-09  16047.6 31015.47 121
## Contamination   -19.03      58.73  -0.324 7.465e-01   -135.3    97.24 121
## Completeness   -184.88      38.04  -4.860 3.564e-06   -260.2  -109.56 121
## 
## Multiple R-squared:  0.1735 ,    Adjusted R-squared:  0.1598 
## F-statistic: 14.11 on 2 and 121 DF,  p-value: 3.096e-06
#completeness is negatively associated with # of predicted genes, s 
ggplot(Bxy_metadata_filtered,aes(x=Completeness,y=X..predicted.genes)) + 
  geom_point() + theme_bw()

Define samples to exclude

samples_to_exclude <- metadata %>% 
  filter(Completeness<94|Contamination>5)
samples_to_exclude <- samples_to_exclude$isolate
metadata <- metadata %>% 
  filter(Completeness>94&Contamination<5)

Root tree and subset metadata

Bxy_tree_file <- "phylogeny/RAxML_bipartitions.Bacteroides_xylanisolvens_outgroup"
Bxy_tree <- ape::read.tree(file.path(dir,Bxy_tree_file))
Bxy_tree$tip.label <- str_replace_all(Bxy_tree$tip.label,'[_-]','.') #clean up names
Bxy_tree <- drop.tip(Bxy_tree,samples_to_exclude) #remove low QC assemblies
setdiff(Bxy_tree$tip.label,metadata$isolate)
## character(0)
Bxy_metadata <- metadata %>% filter(isolate %in% Bxy_tree$tip.label)
write.table(Bxy_metadata,file=file.path(dir,'analyses/Bxy_metadata.txt'))

ggtree(Bxy_tree) + geom_nodelab()
## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `mutate_()` is deprecated as of dplyr 0.7.0.
## Please use `mutate()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

Bov <- metadata %>% filter(taxonomy_Species == 'Bacteroides_ovatus') 
Bov_outgroup <- intersect(Bxy_tree$tip.label,Bov$isolate)
Bxy_tree <- root(Bxy_tree,outgroup=Bov_outgroup) 

get_color_palette <- function(tips) {
  Bxy_metadata <- Bxy_metadata %>% filter(isolate %in% tips)
  vec <- sort(unique(Bxy_metadata$host_site))
  return(recode(vec,
                          'human_USA'='blue3',
                          'human_China' = 'cyan',
                          'bonobo_Columbus_Zoo'='red2',
                          'chimpanzee_Houston_Zoo'='orange2',
                          'orangutan_Houston_Zoo'='purple4',
                          'gorilla_Columbus_Zoo'='green3',
                          'chicken_siteUnknown'='tan',
                          'missing_siteUnknown'='brown4'))}
host_site_color <- get_color_palette(Bxy_tree$tip.label)

Bxy_tree_plot <- ggtree(Bxy_tree) %<+% Bxy_metadata +
  geom_nodepoint(aes(subset = suppressWarnings(as.numeric(label)) > 50),size=.75) +
  geom_tippoint(aes(color=host_site), alpha=0.8)  + 
  scale_colour_manual(values=get_color_palette(Bxy_tree$tip.label)) + 
  geom_tiplab() + 
  xlim(NA,.5)
Bxy_tree_plot

write.tree(Bxy_tree,file=file.path(dir,'analyses/Bxy.tre'))
ggsave(Bxy_tree_plot,file=file.path(dir,'analyses/Bxy_phylogeny.pdf'),height=15,width=8)

Add clades to tree and metadata

cladeA_MCRA <- ape::getMRCA(Bxy_tree,c('GCA.009102525.1.ASM910252v1','GCA.009093695.1.ASM909369v1'))
cladeA_tree <- extract.clade(Bxy_tree,cladeA_MCRA)
ggtree(cladeA_tree) %<+% metadata +
  geom_nodepoint(aes(subset = suppressWarnings(as.numeric(label)) > 50),size=.75) +
  geom_tippoint(aes(color=host_site), alpha=0.8)  +
  scale_colour_manual(values=get_color_palette(cladeA_tree$tip.label)) + 
  geom_tiplab() +
  xlim(NA,.5)

cladeB_MCRA <- ape::getMRCA(Bxy_tree,c('P17.D4','GCA.003464445.1.ASM346444v1'))
cladeB_tree <- extract.clade(Bxy_tree,cladeB_MCRA)
ggtree(cladeB_tree) %<+% metadata +
  geom_nodepoint(aes(subset = suppressWarnings(as.numeric(label)) > 50),size=.75) +
  geom_tippoint(aes(color=host_site), alpha=0.8)  +
  scale_colour_manual(values=get_color_palette(cladeB_tree$tip.label)) + 
  geom_tiplab() +
  xlim(NA,.5)

cladeC_MCRA <- ape::getMRCA(Bxy_tree,c('P21.4E','GCA.009102685.1.ASM910268v1'))
cladeC_tree <- extract.clade(Bxy_tree,cladeC_MCRA)
ggtree(cladeC_tree) %<+% metadata +
  geom_nodepoint(aes(subset = suppressWarnings(as.numeric(label)) > 50),size=.75) +
  geom_tippoint(aes(color=host_site), alpha=0.8)  +
  scale_colour_manual(values=get_color_palette(cladeC_tree$tip.label)) + 
  geom_tiplab() +
  xlim(NA,.5)

Bxy_metadata <- Bxy_metadata %>% mutate(clade = ifelse(
  isolate %in% cladeA_tree$tip.label,'cladeA',
    ifelse(isolate %in% cladeB_tree$tip.label,'cladeB',
      ifelse(isolate %in% cladeC_tree$tip.label,'cladeC','unassigned'))))

Captive clade assignment

captive1Node  <- ape::getMRCA(Bxy_tree,c('P17.D4','P13.A9'))
captive1tree <- extract.clade(Bxy_tree,captive1Node)
captive2Node  <- ape::getMRCA(Bxy_tree,c('P21.4E','P21.11C'))
captive2tree <- extract.clade(Bxy_tree,captive2Node)
captive3Node  <- ape::getMRCA(Bxy_tree,c('P21.6E','P21.2G'))
captive3tree <- extract.clade(Bxy_tree,captive3Node)


Bxy_tree_plot_cladelabels <- ggtree(Bxy_tree) %<+% 
  Bxy_metadata +
  geom_nodepoint(aes(subset = suppressWarnings(as.numeric(label)) > 50),size=.75) +
  geom_tippoint(aes(color=host_site), alpha=0.8)  + 
  scale_colour_manual(values=get_color_palette(Bxy_tree$tip.label)) + 
  xlim(NA,.5)+ 
  geom_cladelabel(node=captive1Node, color='black', offset=.01,
                  label="Mixed-host captive clade") + 
  geom_cladelabel(node=captive2Node, color='black', offset=.01,label="Captive gorilla clade2") +
  geom_cladelabel(node=captive3Node, color='black', offset=.01,label="Captive gorilla clade1") +
   geom_cladelabel(node=cladeA_MCRA, color='black', offset=.02,label="CladeA") + 
  geom_cladelabel(node=cladeB_MCRA, color='black', offset=.22,label="CladeB") +
  geom_cladelabel(node=cladeC_MCRA, color='black', offset=.2,label="CladeC") +
  xlim(NA,.5)
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
Bxy_tree_plot_cladelabels

ggsave(Bxy_tree_plot_cladelabels,
       file=file.path(dir,'analyses/Bxy_phylogeny_cladeLabels.pdf'),height=15,width=8)
Bxy_metadata <- Bxy_metadata %>% mutate(captive_clade = ifelse(
  isolate %in% captive1tree$tip.label,'mixed.hostB',
    ifelse(isolate %in% captive2tree$tip.label,'gorillaB',
      ifelse(isolate %in% captive3tree$tip.label,'gorillaC','unassigned'))))

Cluster isolate genomes

#aln <- read.alignment(file.path(dir,'roary/core_gene_alignment.aln'),format='fasta')
#alnd <- as.matrix(dist.alignment(aln, matrix = "similarity",gap=0))
#colnames(alnd) <- str_replace_all(colnames(alnd),'[_-]','.')
#rownames(alnd) <- str_replace_all(rownames(alnd),'[_-]','.')
#alnd <- alnd[Bxy_metadata$isolate,Bxy_metadata$isolate]
#write.table(alnd,file = file.path(dir,'analyses/core_gene_alignment_dist.txt'),sep='\t')
Read in dist matrix of core alignment
alnd <- read.table(file = file.path(dir,'analyses/core_gene_alignment_dist.txt'),sep='\t')
hc <- hclust(dist(alnd))

cluster99 <- cutree(hc,h=.01)
cluster99 <- as.data.frame(cluster99) %>% rownames_to_column(var='isolate')

cluster999 <- cutree(hc,h=.001)
cluster999 <- as.data.frame(cluster999) %>% rownames_to_column(var='isolate')

Bxy_metadata <- Bxy_metadata %>% left_join(cluster99,by='isolate') %>% left_join(cluster999,by='isolate')

Write metadata with clade and cluster assignments

write.table(Bxy_metadata,file=file.path(dir,'analyses/Bxy_metadata.txt'),quote=F,sep='\t')

Annotations adding eggNOG and dbCAN

dbCAN

Use online dbCAN metada server (http://bcb.unl.edu/dbCAN2/index.php) to annotate pangenome reference produced by roary. Split files to meet size requirements. Run against dbCAN, CAZy, PPR.

pan_genome_fasta <- read.fasta(file.path(dir,'roary/pan_genome_reference.fa'),whole.header = T)
nseqs = length(pan_genome_fasta)
half= nseqs/2
part1 = pan_genome_fasta[1:half] 
part2 = pan_genome_fasta[(half+1):nseqs] 
system(paste0('mkdir -pv ',file.path(dir,'analyses/annotation')))
seqinr::write.fasta(part1,names=names(part1),
                    file=file.path(dir,'analyses/annotation/pan_genome_reference_part1.fa'))
seqinr::write.fasta(part2,names=names(part2),
                    file=file.path(dir,'analyses/annotation/pan_genome_reference_part2.fa'))

Download results from dbCAN meta server

#wget http://bcb.unl.edu/dbCAN2/data/blast/20210319105041/overview.txt -O dbDCAN_part1.txt
#wget http://bcb.unl.edu/dbCAN2/data/blast/20210319111110/overview.txt -O dbDCAN_part2.txt
#system(paste0('mv dbDCAN_part1.txt ',file.path(dir,'analyses/annotation/dbDCAN_part1.txt')))
#system(paste0('mv dbDCAN_part2.txt ',file.path(dir,'analyses/annotation/dbDCAN_part2.txt')))
#read in DBCan
dbcan_p1 <- read.table(file.path(dir,'analyses/annotation/dbDCAN_part1.txt'),sep='\t',header=T)
dbcan_p2 <- read.table(file.path(dir,'analyses/annotation/dbDCAN_part2.txt'),sep='\t',header=T)
head(dbcan_p1) #dbCAN split fasta sequence header so have to add back the Gene ortholog
##            Gene.ID        HMMER Hotpep DIAMOND Signalp X
## 1 BALBAENG_00590_1  GH33(1-206)   GH33    GH33       N 3
## 2 BALBAENG_00617_1 GH76(33-367)   GH76    GH76 Y(1-35) 3
## 3 BALBAENG_00619_1  GH92(3-206)   GH92    GH92       N 3
## 4 BALBAENG_00625_1  GH92(1-429)   GH92    GH92       N 3
## 5 BALBAENG_00627_1 GH76(33-367)   GH76    GH76 Y(1-35) 3
## 6 BALBAENG_00628_1 GH76(14-329)   GH76    GH76       N 3
dbcan <- dbcan_p1 %>% 
  add_row(dbcan_p2) %>% #add part2 to end of part2
  mutate(Gene.ID=str_sub(Gene.ID, end=-3)) %>% #remove '_1' from end of GeneID
  mutate_all(as.character) 

header <- names(pan_genome_fasta) 
dbcan <- as.data.frame(header) %>%
  separate(header,sep=' ',into=c('Gene.ID','Gene')) %>%
  left_join(dbcan,by='Gene.ID') 
head(dbcan)
##          Gene.ID        Gene HMMER Hotpep DIAMOND Signalp    X
## 1 HFONCOPK_00001 group_13236  <NA>   <NA>    <NA>    <NA> <NA>
## 2 HFONCOPK_00002  group_9967  <NA>   <NA>    <NA>    <NA> <NA>
## 3 HFONCOPK_00003        degA  <NA>   <NA>    <NA>    <NA> <NA>
## 4 HFONCOPK_00004        rbsK  <NA>   <NA>    <NA>    <NA> <NA>
## 5 HFONCOPK_00005  group_2728  <NA>   <NA>    <NA>    <NA> <NA>
## 6 HFONCOPK_00006      susD_3  <NA>   <NA>    <NA>    <NA> <NA>
write.table(dbcan,file.path(dir,'analyses/annotation/dbDCAN_final.txt'),sep='\t',quote=F)

Read in presence absence table produced by roary

pres_abs <- read.csv(file.path(dir,'roary/gene_presence_absence.csv'))
colnames(pres_abs) <- str_replace_all(colnames(pres_abs),'[_-]','.')
setdiff(Bxy_metadata$isolate,colnames(pres_abs)) #any isolates missing?
## character(0)
isblank <- function(x) (ifelse(x=="",0,1))
pres_abs <- pres_abs %>% 
  select(Gene,Annotation,Bxy_metadata$isolate)  %>%
  mutate_at(vars(-Gene,-Annotation),isblank) 

Add Roary gene presence absence table to dbCAN data

dbcan_isolates <- dbcan %>% 
  left_join(pres_abs,by='Gene') %>% 
  filter(!is.na(DIAMOND)) %>% 
  mutate(consensus = ifelse(DIAMOND != 'N',DIAMOND, 
                            ifelse(HMMER != 'N',HMMER,Hotpep))) %>%
  separate(consensus,sep='[(_]',into = c('consensus'),extra = 'drop')
  
dbcan_isolates <- dbcan_isolates %>% 
  pivot_longer(cols=Bxy_metadata$isolate,
               names_to='isolate',values_to='present') %>%
  filter(present == 1) %>%
  group_by(isolate,consensus) %>%
  tally() 

summary_of_CAZY_domains <- dbcan_isolates %>% 
  group_by(consensus) %>% 
  summarise(total = sum(n)) %>% 
  arrange(-total)
## `summarise()` ungrouping output (override with `.groups` argument)
HOST_table <- Bxy_metadata %>% 
  select(isolate,host_site) %>%
  column_to_rownames(var='isolate') 

CBM_table <- dbcan_isolates %>% 
  as.data.frame() %>% 
  dplyr::filter(stringr::str_starts(consensus, "CBM")) %>%
  pivot_wider(names_from=consensus,values_from=n,values_fill=0) %>%
  column_to_rownames(var='isolate') 
head(CBM_table)
##                             CBM0+CBM4+GH10 CBM13 CBM20+GH77 CBM32 CBM32+GH2
## GCA.002161115.1.ASM216111v1              1     1          1     2         2
## GCA.002161135.1.ASM216113v1              1     1          1     2         1
## GCA.002959635.1.ASM295963v1              0     0          1     2         1
## GCA.003436085.1.ASM343608v1              1     1          1     2         2
## GCA.003437545.1.ASM343754v1              0     1          1     1         2
## GCA.003458755.1.ASM345875v1              0     1          1     1         2
##                             CBM32+GH35 CBM35+GH27 CBM48+CE1+CE6 CBM48+GH13 CBM5
## GCA.002161115.1.ASM216111v1          2          1             1          1    3
## GCA.002161135.1.ASM216113v1          2          1             1          1    3
## GCA.002959635.1.ASM295963v1          2          1             1          1    2
## GCA.003436085.1.ASM343608v1          1          1             1          1    3
## GCA.003437545.1.ASM343754v1          2          1             1          1    3
## GCA.003458755.1.ASM345875v1          4          1             1          2    3
##                             CBM50 CBM50+GH23 CBM50+GH73 CBM57+GH137+GH2
## GCA.002161115.1.ASM216111v1     5          1          1               1
## GCA.002161135.1.ASM216113v1     5          1          1               1
## GCA.002959635.1.ASM295963v1     6          1          1               1
## GCA.003436085.1.ASM343608v1     5          1          1               1
## GCA.003437545.1.ASM343754v1     5          1          1               1
## GCA.003458755.1.ASM345875v1     5          1          1               1
##                             CBM57+GH2 CBM6 CBM6+GH43 CBM62 CBM67+GH0+GH78 CBM56
## GCA.002161115.1.ASM216111v1         2    1         2     1              1     0
## GCA.002161135.1.ASM216113v1         2    2         2     0              1     1
## GCA.002959635.1.ASM295963v1         2    2         6     1              0     1
## GCA.003436085.1.ASM343608v1         2    1         2     0              1     0
## GCA.003437545.1.ASM343754v1         2    0         5     0              0     1
## GCA.003458755.1.ASM345875v1         5    2         2     3              0     0
##                             CBM32+GH43 CBM35 CBM35+GH98 CBM4+GH10 CBM57+CBM6
## GCA.002161115.1.ASM216111v1          0     0          0         0          0
## GCA.002161135.1.ASM216113v1          0     0          0         0          0
## GCA.002959635.1.ASM295963v1          1     2          1         1          0
## GCA.003436085.1.ASM343608v1          0     0          0         0          0
## GCA.003437545.1.ASM343754v1          0     0          1         0          1
## GCA.003458755.1.ASM345875v1          0     0          2         0          0
##                             CBM67+GH78 CBM66 CBM2 CBM61+GH53 CBM0
## GCA.002161115.1.ASM216111v1          0     0    0          0    0
## GCA.002161135.1.ASM216113v1          0     0    0          0    0
## GCA.002959635.1.ASM295963v1          0     0    0          0    0
## GCA.003436085.1.ASM343608v1          0     0    0          0    0
## GCA.003437545.1.ASM343754v1          1     0    0          0    0
## GCA.003458755.1.ASM345875v1          1     0    0          0    0
gheatmap(ggtree(Bxy_tree),CBM_table, width=.3,
         colnames_angle=90, colnames_offset_y = .25) +
    scale_fill_viridis_c(option="A", name="continuous\nvalue")
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

CAZY_table <- dbcan_isolates %>% 
  as.data.frame() %>% 
  pivot_wider(names_from=consensus,values_from=n,values_fill=0) %>%
  column_to_rownames(var='isolate') 
hc <- hclust(dist(t(CAZY_table)))
plot(hc, hang = -1, cex = 0.6)

cluster <- cutree(hc,k=10)
cazy <- names(c)
df_cazy <- data.frame(cbind(cazy,cluster)) %>% rownames_to_column(var='cazy')

cz<- c('GH2','GT2','GT4')
(cz <- df_cazy %>% filter(cluster>1) %>% pull(cazy))
##  [1] "CBM50"     "CBM6+GH43" "CE1"       "GH0"       "GH10"      "GH105"    
##  [7] "GH109"     "GH115"     "GH117"     "GH13"      "GH130"     "GH16"     
## [13] "GH18"      "GH2"       "GH20"      "GH28"      "GH29"      "GH3"      
## [19] "GH31"      "GH33"      "GH36"      "GH43"      "GH5"       "GH50"     
## [25] "GH51"      "GH76"      "GH78"      "GH88"      "GH92"      "GH95"     
## [31] "GH97"      "GT0"       "GT2"       "GT4"       "GT51"      "PL1"
cazy_sd <- apply(t(CAZY_table), 1, sd, na.rm=TRUE)
cz <- names(sort(cazy_sd,decreasing = TRUE)[1:10])


select_CAZY <- CAZY_table %>% select(cz)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(cz)` instead of `cz` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
p <- gheatmap(ggtree(Bxy_tree) + ylim(-10,NA),
              HOST_table,width=.3,colnames_angle=90,hjust=1)  + scale_fill_manual(values=get_color_palette(Bxy_tree$tip.label)) 
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
library(ggnewscale)
p <- p + new_scale_fill()
p2 <- gheatmap(p,select_CAZY,offset=.1,colnames_angle=90,hjust=1) + scale_fill_viridis_c(option="C")
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
p2

ggsave(p2,file=file.path(dir,'analyses/cazy_heatmap.pdf'),height=6)
## Saving 7 x 6 in image

Annalyzing results of eggnog mapper

eggnog <- readr::read_tsv(file.path(dir,'eggnog_mapper/pan_genome_reference.emapper.annotations'),comment = '##')
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   evalue = col_double(),
##   score = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
eggnog <- eggnog %>% rename('query'='#query') %>% select(query,best_OG_cat,best_OG_name,best_OG_desc)
table(eggnog$best_OG_cat)
## 
##       -       A       B       C      CE      CG      CH      CO      CP       D 
##    4365       1       1     520       1       6      14      60      11     284 
##      DJ      DK      DM      DT      DZ       E      EF      EG     EGP      EH 
##       5       2      26       1      29     647      11      46      36      22 
##  EH / C EH / EH      EJ      EL      EM      EP      EQ      ET      EU       F 
##       2       1       8       1       8       2       1      10       9     309 
##      FG      FJ      FK      FP      FT       G   G / P   G / S      GK     GKT 
##       3       9       3       4       1    1846       1       2      14       3 
##      GM      GT       H      HJ     HJM      HK      HP      HQ       I      IM 
##     121       1     637       7       1       1      11       1     207       5 
##      IQ      IU       J     JKL      JM       K      KL     KLT KLT / T      KO 
##      70       1     401      16      26    1053      14      22       1       1 
##      KT     KTU       L   L / J   L / V      LO      LT      LU       M   M / G 
##      85       1    2510       1       1       2       5       5    1718       1 
##   M / O   M / S     MNU     MOT      MP      MU  MU / H       N     NOU      NT 
##       2       1      14       2       7      59       1     108       1       2 
##      NU       O   O / O      OU       P      PT       Q   Q / -       S       T 
##      25     314       1      20    1284     125      82       2    6025     682 
##   T / -       U      UW       V 
##       2     587       7     492
header <- names(pan_genome_fasta) 
Bov <- Bxy_metadata$isolate[Bxy_metadata$taxonomy_Species!="Bacteroides_xylanisolvens"]
Bxy <- Bxy_metadata$isolate[Bxy_metadata$taxonomy_Species=="Bacteroides_xylanisolvens"]
df <- as.data.frame(header) %>%
  separate(header,sep=' ',into=c('Gene.ID','Gene')) %>%
  left_join(eggnog, by = c('Gene.ID' = 'query')) %>%
  left_join(pres_abs,by='Gene') %>%
  select(-Bov) 
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(Bov)` instead of `Bov` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
head(df)
##          Gene.ID        Gene best_OG_cat            best_OG_name
## 1 HFONCOPK_00001 group_13236        <NA>                    <NA>
## 2 HFONCOPK_00002  group_9967        <NA>                    <NA>
## 3 HFONCOPK_00003        degA           K 4NM06@976|Bacteroidetes
## 4 HFONCOPK_00004        rbsK           H 4NENQ@976|Bacteroidetes
## 5 HFONCOPK_00005  group_2728           P 4NDXS@976|Bacteroidetes
## 6 HFONCOPK_00006      susD_3           M 4NE4Y@976|Bacteroidetes
##                                                                                                                                                                                                                                                       best_OG_desc
## 1                                                                                                                                                                                                                                                             <NA>
## 2                                                                                                                                                                                                                                                             <NA>
## 3                                                                                                                                                                                                                       helix_turn _helix lactose operon repressor
## 4 Catalyzes the phosphorylation of ribose at O-5 in a reaction requiring ATP and magnesium. The resulting D-ribose-5- phosphate can then be used either for sythesis of nucleotides, histidine, and tryptophan, or as a component of the pentose phosphate pathway
## 5                                                                                                                                                                                                             TonB-linked outer membrane protein, SusC RagA family
## 6                                                                                                                                                                                                                                                       RagB, SusD
##                                Annotation P17.C2 P17.A3 P17.D4 P17.D5 P19.A7
## 1                    hypothetical protein      0      0      0      0      0
## 2                    hypothetical protein      0      0      0      0      0
## 3 HTH-type transcriptional regulator DegA      0      0      0      0      0
## 4                              Ribokinase      0      0      0      0      0
## 5            TonB-dependent receptor SusC      0      0      0      0      0
## 6                    hypothetical protein      0      0      0      0      0
##   P19.G7 P19.D7.or.C7 P19.10A P19.10B P19.10E P20.6B P20.6G P21.2G P21.4E
## 1      0            0       0       0       0      0      0      0      0
## 2      0            0       0       0       0      0      0      0      0
## 3      0            0       0       0       0      0      0      0      0
## 4      0            0       0       0       0      0      0      0      0
## 5      0            0       0       0       0      0      0      0      0
## 6      0            0       0       0       0      0      0      0      0
##   P21.4F P21.4G P21.6D P21.6E P21.6G P21.11A P21.11C P13.H2 P13.F7 P13.A9
## 1      0      0      0      0      0       0       0      0      0      0
## 2      0      0      0      0      0       0       0      0      0      0
## 3      0      0      0      0      0       0       0      0      0      0
## 4      0      0      0      0      0       0       0      0      0      0
## 5      0      0      0      0      0       0       0      0      0      0
## 6      0      0      0      0      0       0       0      0      0      0
##   P13.G9 P13.H9 P14.C1 P14.A2 P14.G2 P14.A4 P14.B4 P14.E4 P14.F5 P14.B8 P15.A10
## 1      0      0      0      0      0      0      0      0      0      0       0
## 2      0      0      0      0      0      0      0      0      0      0       0
## 3      0      0      0      0      0      0      0      0      0      0       0
## 4      0      0      0      0      0      0      0      0      0      0       0
## 5      0      0      0      0      0      0      0      0      0      0       0
## 6      0      0      0      0      0      0      0      0      0      0       0
##   P15.B10 P15.E10 P15.F10 P15.F11 GCA.015551805.1.ASM1555180v1
## 1       0       0       0       0                            0
## 2       0       0       0       0                            0
## 3       0       0       0       0                            1
## 4       0       0       0       0                            1
## 5       0       0       0       0                            1
## 6       0       0       0       0                            1
##   GCA.006546965.1.ASM654696v1 GCA.008710235.1.ASM871023v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102765.1.ASM910276v1 GCA.009102605.1.ASM910260v1
## 1                           0                           0
## 2                           0                           1
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102705.1.ASM910270v1 GCA.009102805.1.ASM910280v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.002161135.1.ASM216113v1 GCA.009102755.1.ASM910275v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           1
## 4                           0                           1
## 5                           0                           1
## 6                           0                           1
##   GCA.009102675.1.ASM910267v1 GCA.009102825.1.ASM910282v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.003469745.1.ASM346974v1 GCA.004167295.1.ASM416729v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           1
## 4                           0                           1
## 5                           0                           1
## 6                           0                           1
##   GCA.009093775.1.ASM909377v1 GCA.003436085.1.ASM343608v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           0
## 4                           1                           0
## 5                           1                           0
## 6                           1                           0
##   GCA.009102685.1.ASM910268v1 GCA.009102845.1.ASM910284v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102085.1.ASM910208v1 GCA.009102665.1.ASM910266v1
## 1                           0                           0
## 2                           0                           1
## 3                           0                           1
## 4                           0                           1
## 5                           0                           1
## 6                           0                           1
##   GCA.009101945.1.ASM910194v1 GCA.009102725.1.ASM910272v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009148965.1.ASM914896v1 GCA.009095545.1.ASM909554v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.003437545.1.ASM343754v1 GCA.003474645.1.ASM347464v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           0
## 4                           1                           0
## 5                           1                           0
## 6                           1                           0
##   GCA.009102015.1.ASM910201v1 GCA.003474245.1.ASM347424v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.003468875.1.ASM346887v1 GCA.015553655.1.ASM1555365v1
## 1                           0                            0
## 2                           0                            0
## 3                           0                            1
## 4                           0                            1
## 5                           0                            1
## 6                           0                            1
##   GCA.009102045.1.ASM910204v1 GCA.009102025.1.ASM910202v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.015668785.1.ASM1566878v1 GCA.002161115.1.ASM216111v1
## 1                            0                           0
## 2                            0                           0
## 3                            1                           0
## 4                            1                           0
## 5                            1                           0
## 6                            1                           0
##   GCA.009102075.1.ASM910207v1 GCA.009093695.1.ASM909369v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           0
## 4                           1                           0
## 5                           1                           0
## 6                           1                           0
##   GCA.015559535.1.ASM1555953v1 GCA.009101535.1.ASM910153v1
## 1                            0                           0
## 2                            0                           0
## 3                            1                           1
## 4                            1                           1
## 5                            1                           1
## 6                            1                           1
##   GCA.015547545.1.ASM1554754v1 GCA.003464445.1.ASM346444v1
## 1                            0                           0
## 2                            0                           0
## 3                            1                           1
## 4                            1                           1
## 5                            1                           1
## 6                            1                           1
##   GCA.009101785.1.ASM910178v1 GCA.009101565.1.ASM910156v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102225.1.ASM910222v1 GCA.009102105.1.ASM910210v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009101635.1.ASM910163v1 GCA.009102625.1.ASM910262v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009101555.1.ASM910155v1 GCA.009153275.1.ASM915327v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102465.1.ASM910246v1 GCA.009102135.1.ASM910213v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102505.1.ASM910250v1 GCA.009093945.1.ASM909394v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102325.1.ASM910232v1 GCA.009101725.1.ASM910172v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009103215.1.ASM910321v1 GCA.009102525.1.ASM910252v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102305.1.ASM910230v1 GCA.015556195.1.ASM1555619v1
## 1                           0                            0
## 2                           0                            0
## 3                           1                            1
## 4                           1                            1
## 5                           0                            1
## 6                           1                            1
##   GCA.009101615.1.ASM910161v1 GCA.009101775.1.ASM910177v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102485.1.ASM910248v1 GCA.009102185.1.ASM910218v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           0
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009153235.1.ASM915323v1 GCA.009101745.1.ASM910174v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009101985.1.ASM910198v1 GCA.009102215.1.ASM910221v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102385.1.ASM910238v1 GCA.009101685.1.ASM910168v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           1
## 4                           1                           1
## 5                           0                           1
## 6                           1                           1
##   GCA.009093835.1.ASM909383v1 GCA.009102545.1.ASM910254v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102165.1.ASM910216v1 GCA.009102245.1.ASM910224v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           1
## 4                           0                           1
## 5                           0                           1
## 6                           0                           1
##   GCA.009102285.1.ASM910228v1 GCA.009102785.1.ASM910278v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           1
## 4                           0                           1
## 5                           0                           1
## 6                           1                           1
##   GCA.009101505.1.ASM910150v1 GCA.009102145.1.ASM910214v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           1
## 4                           1                           1
## 5                           0                           0
## 6                           1                           1
##   GCA.009101625.1.ASM910162v1 GCA.009101665.1.ASM910166v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           0                           1
## 6                           1                           1
##   GCA.009102575.1.ASM910257v1 GCA.009102265.1.ASM910226v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           0
## 4                           1                           1
## 5                           0                           0
## 6                           1                           1
##   GCA.009102375.1.ASM910237v1 GCA.009101705.1.ASM910170v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           0
## 4                           1                           0
## 5                           1                           0
## 6                           1                           1
##   GCA.009101965.1.ASM910196v1 GCA.009101645.1.ASM910164v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           0
## 4                           1                           0
## 5                           1                           0
## 6                           1                           1
##   GCA.003458755.1.ASM345875v1 GCA.003473975.1.ASM347397v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           1
## 4                           0                           1
## 5                           0                           1
## 6                           0                           1
df$total <- df %>% 
  select(Bxy) %>%
  mutate_all(as.numeric) %>% 
  rowSums() 
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(Bxy)` instead of `Bxy` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
df <- df %>% filter(total > 0) #remove genes not present after excluding Bov
core = length(Bxy)* .95 #set limits for core, shell, cloud genes 
shell = length(Bxy)* .15
df <- df %>% mutate(core_cat = if_else(total>core,'core',
                             if_else(total>shell,'shell','cloud')),
                    cog1 =str_sub(best_OG_cat, end=1),
                    best_OG_cat = replace_na(best_OG_cat,'-'))

df_sh <- df %>% select(-Bxy)

#average breakdown of gene cat 
df %>% select(core_cat,Bxy) %>%
  pivot_longer(Bxy, names_to = 'isolate') %>% 
  group_by(core_cat,isolate) %>% 
  summarise_all(sum) %>% 
  as.data.frame() %>%
  ggplot(aes(x=core_cat,y=value)) + 
  geom_boxplot(aes(fill=core_cat)) + theme_bw()

df2 <- df %>% select(core_cat,cog1,Bxy) %>%
  pivot_longer(Bxy, names_to = 'isolate') %>% 
  group_by(core_cat,cog1,isolate) %>% 
  summarise_all(sum) %>% 
  as.data.frame() 

ggplot(df2, aes(x=cog1,y=value)) + 
  geom_boxplot(aes(fill=cog1)) + theme_bw()

ggplot(df2, aes(x=cog1,y=value)) + 
  geom_boxplot(aes(fill=core_cat)) + theme_bw()

df3 <- df2 %>% 
  pivot_wider(names_from = core_cat, values_from = value, values_fill = 0) 
  
df4 <- df3 %>% select(-cog1) %>% 
  group_by(isolate) %>% 
  summarise_all(sum) %>%
  rename(cloud_total=cloud,shell_total=shell,core_total=core)

df5 <- df3 %>% 
  left_join(df4,by='isolate') %>%
  mutate(cloud_prop = cloud/cloud_total,
         shell_prop = shell/shell_total,
         core_prop = core/core_total) 

df6 <- df5 %>%
  select(cog1,isolate,cloud_prop,shell_prop,core_prop) %>%
  pivot_longer(cols =  c(cloud_prop,shell_prop,core_prop),names_to='core_cat')
  
ggplot(df6, aes(x=cog1,y=value)) + 
  geom_boxplot(aes(fill=core_cat)) + theme_bw()

core_vs_shell <- df6 %>% 
  group_by(cog1) %>%
  filter(core_cat %in% c('shell_prop','core_prop')) %>%
  rstatix::kruskal_test(value ~ core_cat) %>%
  mutate(comp='core_vs_shell')

cloud_vs_shell <- df6 %>% 
  group_by(cog1) %>%
  filter(core_cat %in% c('shell_prop','cloud_prop')) %>%
  rstatix::kruskal_test(value ~ core_cat) %>%
  mutate(comp='cloud_vs_shell')

cloud_vs_core <- df6 %>% 
  group_by(cog1) %>%
  filter(core_cat %in% c('core_prop','cloud_prop')) %>%
  rstatix::kruskal_test(value ~ core_cat) %>%
  mutate(comp='cloud_vs_core')

kw <- core_vs_shell %>% 
  add_row(cloud_vs_shell) %>% 
  add_row(cloud_vs_core) %>% 
  filter(p!='NaN') 
kw <- kw %>%
  mutate(p_adj = p * nrow(kw))
df <- as.data.frame(header) %>%
  separate(header,sep=' ',into=c('Gene.ID','Gene')) %>%
  left_join(eggnog, by = c('Gene.ID' = 'query')) %>%
  left_join(pres_abs,by='Gene') %>%
  select(-Bov) 

head(df)
##          Gene.ID        Gene best_OG_cat            best_OG_name
## 1 HFONCOPK_00001 group_13236        <NA>                    <NA>
## 2 HFONCOPK_00002  group_9967        <NA>                    <NA>
## 3 HFONCOPK_00003        degA           K 4NM06@976|Bacteroidetes
## 4 HFONCOPK_00004        rbsK           H 4NENQ@976|Bacteroidetes
## 5 HFONCOPK_00005  group_2728           P 4NDXS@976|Bacteroidetes
## 6 HFONCOPK_00006      susD_3           M 4NE4Y@976|Bacteroidetes
##                                                                                                                                                                                                                                                       best_OG_desc
## 1                                                                                                                                                                                                                                                             <NA>
## 2                                                                                                                                                                                                                                                             <NA>
## 3                                                                                                                                                                                                                       helix_turn _helix lactose operon repressor
## 4 Catalyzes the phosphorylation of ribose at O-5 in a reaction requiring ATP and magnesium. The resulting D-ribose-5- phosphate can then be used either for sythesis of nucleotides, histidine, and tryptophan, or as a component of the pentose phosphate pathway
## 5                                                                                                                                                                                                             TonB-linked outer membrane protein, SusC RagA family
## 6                                                                                                                                                                                                                                                       RagB, SusD
##                                Annotation P17.C2 P17.A3 P17.D4 P17.D5 P19.A7
## 1                    hypothetical protein      0      0      0      0      0
## 2                    hypothetical protein      0      0      0      0      0
## 3 HTH-type transcriptional regulator DegA      0      0      0      0      0
## 4                              Ribokinase      0      0      0      0      0
## 5            TonB-dependent receptor SusC      0      0      0      0      0
## 6                    hypothetical protein      0      0      0      0      0
##   P19.G7 P19.D7.or.C7 P19.10A P19.10B P19.10E P20.6B P20.6G P21.2G P21.4E
## 1      0            0       0       0       0      0      0      0      0
## 2      0            0       0       0       0      0      0      0      0
## 3      0            0       0       0       0      0      0      0      0
## 4      0            0       0       0       0      0      0      0      0
## 5      0            0       0       0       0      0      0      0      0
## 6      0            0       0       0       0      0      0      0      0
##   P21.4F P21.4G P21.6D P21.6E P21.6G P21.11A P21.11C P13.H2 P13.F7 P13.A9
## 1      0      0      0      0      0       0       0      0      0      0
## 2      0      0      0      0      0       0       0      0      0      0
## 3      0      0      0      0      0       0       0      0      0      0
## 4      0      0      0      0      0       0       0      0      0      0
## 5      0      0      0      0      0       0       0      0      0      0
## 6      0      0      0      0      0       0       0      0      0      0
##   P13.G9 P13.H9 P14.C1 P14.A2 P14.G2 P14.A4 P14.B4 P14.E4 P14.F5 P14.B8 P15.A10
## 1      0      0      0      0      0      0      0      0      0      0       0
## 2      0      0      0      0      0      0      0      0      0      0       0
## 3      0      0      0      0      0      0      0      0      0      0       0
## 4      0      0      0      0      0      0      0      0      0      0       0
## 5      0      0      0      0      0      0      0      0      0      0       0
## 6      0      0      0      0      0      0      0      0      0      0       0
##   P15.B10 P15.E10 P15.F10 P15.F11 GCA.015551805.1.ASM1555180v1
## 1       0       0       0       0                            0
## 2       0       0       0       0                            0
## 3       0       0       0       0                            1
## 4       0       0       0       0                            1
## 5       0       0       0       0                            1
## 6       0       0       0       0                            1
##   GCA.006546965.1.ASM654696v1 GCA.008710235.1.ASM871023v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102765.1.ASM910276v1 GCA.009102605.1.ASM910260v1
## 1                           0                           0
## 2                           0                           1
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102705.1.ASM910270v1 GCA.009102805.1.ASM910280v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.002161135.1.ASM216113v1 GCA.009102755.1.ASM910275v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           1
## 4                           0                           1
## 5                           0                           1
## 6                           0                           1
##   GCA.009102675.1.ASM910267v1 GCA.009102825.1.ASM910282v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.003469745.1.ASM346974v1 GCA.004167295.1.ASM416729v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           1
## 4                           0                           1
## 5                           0                           1
## 6                           0                           1
##   GCA.009093775.1.ASM909377v1 GCA.003436085.1.ASM343608v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           0
## 4                           1                           0
## 5                           1                           0
## 6                           1                           0
##   GCA.009102685.1.ASM910268v1 GCA.009102845.1.ASM910284v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102085.1.ASM910208v1 GCA.009102665.1.ASM910266v1
## 1                           0                           0
## 2                           0                           1
## 3                           0                           1
## 4                           0                           1
## 5                           0                           1
## 6                           0                           1
##   GCA.009101945.1.ASM910194v1 GCA.009102725.1.ASM910272v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009148965.1.ASM914896v1 GCA.009095545.1.ASM909554v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.003437545.1.ASM343754v1 GCA.003474645.1.ASM347464v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           0
## 4                           1                           0
## 5                           1                           0
## 6                           1                           0
##   GCA.009102015.1.ASM910201v1 GCA.003474245.1.ASM347424v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.003468875.1.ASM346887v1 GCA.015553655.1.ASM1555365v1
## 1                           0                            0
## 2                           0                            0
## 3                           0                            1
## 4                           0                            1
## 5                           0                            1
## 6                           0                            1
##   GCA.009102045.1.ASM910204v1 GCA.009102025.1.ASM910202v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.015668785.1.ASM1566878v1 GCA.002161115.1.ASM216111v1
## 1                            0                           0
## 2                            0                           0
## 3                            1                           0
## 4                            1                           0
## 5                            1                           0
## 6                            1                           0
##   GCA.009102075.1.ASM910207v1 GCA.009093695.1.ASM909369v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           0
## 4                           1                           0
## 5                           1                           0
## 6                           1                           0
##   GCA.015559535.1.ASM1555953v1 GCA.009101535.1.ASM910153v1
## 1                            0                           0
## 2                            0                           0
## 3                            1                           1
## 4                            1                           1
## 5                            1                           1
## 6                            1                           1
##   GCA.015547545.1.ASM1554754v1 GCA.003464445.1.ASM346444v1
## 1                            0                           0
## 2                            0                           0
## 3                            1                           1
## 4                            1                           1
## 5                            1                           1
## 6                            1                           1
##   GCA.009101785.1.ASM910178v1 GCA.009101565.1.ASM910156v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102225.1.ASM910222v1 GCA.009102105.1.ASM910210v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009101635.1.ASM910163v1 GCA.009102625.1.ASM910262v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009101555.1.ASM910155v1 GCA.009153275.1.ASM915327v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102465.1.ASM910246v1 GCA.009102135.1.ASM910213v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102505.1.ASM910250v1 GCA.009093945.1.ASM909394v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102325.1.ASM910232v1 GCA.009101725.1.ASM910172v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009103215.1.ASM910321v1 GCA.009102525.1.ASM910252v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102305.1.ASM910230v1 GCA.015556195.1.ASM1555619v1
## 1                           0                            0
## 2                           0                            0
## 3                           1                            1
## 4                           1                            1
## 5                           0                            1
## 6                           1                            1
##   GCA.009101615.1.ASM910161v1 GCA.009101775.1.ASM910177v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102485.1.ASM910248v1 GCA.009102185.1.ASM910218v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           0
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009153235.1.ASM915323v1 GCA.009101745.1.ASM910174v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009101985.1.ASM910198v1 GCA.009102215.1.ASM910221v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102385.1.ASM910238v1 GCA.009101685.1.ASM910168v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           1
## 4                           1                           1
## 5                           0                           1
## 6                           1                           1
##   GCA.009093835.1.ASM909383v1 GCA.009102545.1.ASM910254v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           1                           1
## 6                           1                           1
##   GCA.009102165.1.ASM910216v1 GCA.009102245.1.ASM910224v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           1
## 4                           0                           1
## 5                           0                           1
## 6                           0                           1
##   GCA.009102285.1.ASM910228v1 GCA.009102785.1.ASM910278v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           1
## 4                           0                           1
## 5                           0                           1
## 6                           1                           1
##   GCA.009101505.1.ASM910150v1 GCA.009102145.1.ASM910214v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           1
## 4                           1                           1
## 5                           0                           0
## 6                           1                           1
##   GCA.009101625.1.ASM910162v1 GCA.009101665.1.ASM910166v1
## 1                           0                           0
## 2                           0                           0
## 3                           1                           1
## 4                           1                           1
## 5                           0                           1
## 6                           1                           1
##   GCA.009102575.1.ASM910257v1 GCA.009102265.1.ASM910226v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           0
## 4                           1                           1
## 5                           0                           0
## 6                           1                           1
##   GCA.009102375.1.ASM910237v1 GCA.009101705.1.ASM910170v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           0
## 4                           1                           0
## 5                           1                           0
## 6                           1                           1
##   GCA.009101965.1.ASM910196v1 GCA.009101645.1.ASM910164v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           0
## 4                           1                           0
## 5                           1                           0
## 6                           1                           1
##   GCA.003458755.1.ASM345875v1 GCA.003473975.1.ASM347397v1
## 1                           0                           0
## 2                           0                           0
## 3                           0                           1
## 4                           0                           1
## 5                           0                           1
## 6                           0                           1
df$total <- df %>% 
  select(Bxy) %>%
  mutate_all(as.numeric) %>% 
  rowSums() 

df <- df %>% filter(total > 0) #remove genes not present after excluding Bov
core = length(Bxy)* .95 #set limits for core, shell, cloud genes 
shell = length(Bxy)* .15
df <- df %>% mutate(core_cat = if_else(total>core,'core','shell'),
                    cog1 =str_sub(best_OG_cat, end=1),
                    best_OG_cat = replace_na(best_OG_cat,'-'))

df_sh <- df %>% select(-Bxy)

#average breakdown of gene cat 
df %>% select(core_cat,Bxy) %>%
  pivot_longer(Bxy, names_to = 'isolate') %>% 
  group_by(core_cat,isolate) %>% 
  summarise_all(sum) %>% 
  as.data.frame() %>%
  ggplot(aes(x=core_cat,y=value)) + 
  geom_boxplot(aes(fill=core_cat)) + theme_bw()

df2 <- df %>% select(core_cat,cog1,Bxy) %>%
  pivot_longer(Bxy, names_to = 'isolate') %>% 
  group_by(core_cat,cog1,isolate) %>% 
  summarise_all(sum) %>% 
  as.data.frame() 

ggplot(df2, aes(x=cog1,y=value)) + 
  geom_boxplot(aes(fill=cog1)) + theme_bw()

ggplot(df2, aes(x=cog1,y=value)) + 
  geom_boxplot(aes(fill=core_cat)) + theme_bw()

df3 <- df2 %>% 
  pivot_wider(names_from = core_cat, values_from = value, values_fill = 0) 
  
df4 <- df3 %>% select(-cog1) %>% 
  group_by(isolate) %>% 
  summarise_all(sum) %>%
  rename(shell_total=shell,core_total=core)

df5 <- df3 %>% 
  left_join(df4,by='isolate') %>%
  mutate(shell_prop = shell/shell_total,
         core_prop = core/core_total) 

shell_cog_prop <- df5 %>%
  select(cog1,isolate,shell_prop,core_prop) %>%
  pivot_longer(cols =  c(shell_prop,core_prop),names_to='core_cat')
  
ggplot(shell_cog_prop, aes(x=cog1,y=value)) + 
  geom_boxplot(aes(fill=core_cat)) + theme_bw()

core_vs_shell <- shell_cog_prop %>% 
  group_by(cog1) %>%
  filter(core_cat %in% c('shell_prop','core_prop')) %>%
  rstatix::kruskal_test(value ~ core_cat) %>%
  mutate(comp='core_vs_shell')

cloud_vs_core <- cloud_vs_core %>%
  mutate(p_adj = p * nrow(kw))

Format data for count

Bxy_tree_count <- Bxy_tree
Bxy_tree_count <- makeNodeLabel(Bxy_tree_count)
write.tree(Bxy_tree_count,file=file.path(dir,'count/Bxy_count.tree'))

format_pres_abs_table <- function(pres_abs_table,output){
  
  pres_abs <- read.csv(pres_abs_table)
  colnames(pres_abs) <- str_replace_all(colnames(pres_abs),'[_-]','.')
  isblank <- function(x) (ifelse(x=="",0,1))
  pres_abs <- pres_abs %>% 
    select(Gene,Bxy_metadata$isolate)  %>%
    mutate_at(vars(-Gene),isblank)   
  write.table(pres_abs,file=paste0(output,'_table.txt'),
                      sep='\t',quote=FALSE,row.names = FALSE)
  pres_abs_no_orfans <- pres_abs %>% 
    mutate(sum = rowSums(across(where(is.numeric)))) %>% 
    filter(sum>1) %>% 
    select(-sum)
  dim(pres_abs_no_orfans)
  write.table(pres_abs_no_orfans,file=paste0(output,'_table_no_orfans.txt'),
              sep='\t',quote=FALSE,row.names = FALSE)
}

format_pres_abs_table(pres_abs_table=file.path(dir,'roary/gene_presence_absence.csv'),
                      output=file.path(dir,'count/roary')) 

Function to run count

runCount <- function(tree,data,output,gain) {
  system(paste('java -Xmx2048M -cp bin/Count/Count.jar ca.umontreal.iro.evolution.genecontent.AsymmetricWagner -gain ',
             gain,
             tree,
             data,'>',
             output,sep=' '))
  system(paste0("grep '# CHANGE' ",output," | sed 's/# //' > ",output,".CHANGE"))
  system(paste0("grep '# PRESENT' ",output," | sed 's/# //' > ",output,".PRESENT"))
  present <- read.table(paste0(output,".PRESENT"),sep='\t',header=TRUE)
  res <- present %>% 
    mutate(istip = if_else(node %in% metadata$isolate,'tip','node')) %>% 
    rstatix::kruskal_test(genes ~ istip)
  res$gain_penalty <- gain
  return(res)
}

#compare gain penalty 
gain2 <-runCount(tree=file.path(dir,'count/Bxy_count.tree'),
                 data=file.path(dir,'count/roary_table.txt'),
                 output=file.path(dir,'count/roary_table'),
                 gain=2)
gain2 <-runCount(tree=file.path(dir,'count/Bxy_count.tree'),
                 data=file.path(dir,'count/roary_table_no_orfans.txt'),
                 output=file.path(dir,'count/roary_table_no_orfans'),
                 gain=2)

Functions to visualize and compare count output between roary and orthofinder

count_output <- read.table(file.path(dir,'count/roary_table_no_orfans.CHANGE'),sep='\t',header=TRUE)
count_tree <- read.tree(file=file.path(dir,'count/Bxy_count.tree'))

#read in count output
count_output <- as.data.frame(count_output) %>% 
    select(-CHANGE) %>%
    rename(label=node) %>%
    select(label,family_gain,family_loss,gene_gain,gene_loss) %>%
    droplevels.data.frame()
  
#add count gene gain loss info to tree_object
tree_dat = count_tree %>% 
    treeio::as_tibble() %>% 
    left_join(count_output,by='label') %>%
    left_join(metadata,by=c('label'='isolate')) %>%
    as.treedata()

extract_subtree_treeio <- function(tree_S4,taxa) {
  MCRA <- ape::getMRCA(as.phylo(tree_S4),taxa)
  subtree <- extract.clade(as.phylo(tree_S4),MCRA)
  to_drop <- setdiff(as.phylo(tree_S4)$tip.label,subtree$tip.label)
  tree_S4_subtree  <- treeio::drop.tip(tree_S4,to_drop)
  return(tree_S4_subtree)
}
count_cladeA_tree <- extract_subtree_treeio(tree_dat,c('GCA.009102525.1.ASM910252v1','GCA.009093695.1.ASM909369v1'))

plot_family_gain <- function(tree,title,limit){
  ggtree(tree)  + 
      geom_text2(aes(x=branch, label=family_gain, subset=(as.numeric(family_gain)>limit|as.numeric(family_loss)>limit)),
             size =3, vjust=-.1, color='firebrick') +
      geom_text2(aes(x=branch, label=family_loss,subset=(as.numeric(family_gain)>limit|as.numeric(family_loss)>limit)),
             size=3, vjust=1, color='blue') + 
      geom_tippoint(aes(color=host_site), alpha=0.8) + 
      scale_colour_manual(values=get_color_palette(as.phylo(tree)$tip.label)) +
      geom_treescale() + ggtitle(title)
}
famgain <- plot_family_gain(count_cladeA_tree,'cladeA',100)
plot(famgain)

plot_gene_gain <- function(tree,title,limit){
  ggtree(tree)  + 
      geom_text2(aes(x=branch, label=gene_gain,subset=(as.numeric(gene_gain)>limit|as.numeric(gene_loss)>limit)),
             size =3, vjust=-.1, color='firebrick') +
      geom_text2(aes(x=branch, label=gene_loss,subset=(as.numeric(gene_gain)>limit|as.numeric(gene_loss)>limit)),
             size=3, vjust=1, color='blue') + 
      geom_tippoint(aes(color=host_site), alpha=0.8) + 
      scale_colour_manual(values=get_color_palette(as.phylo(tree)$tip.label)) + 
    geom_treescale() + ggtitle(title)
}
plot_gene_gain(count_cladeA_tree,'cladeA',100)

visualize_count <- function(count_output_file,tree_file,output_prefix){
  count_output = read.table(count_output_file,sep='\t',header=TRUE)
  count_tree = read.tree(file=tree_file)
  
  #read in count output
  count_output <- as.data.frame(count_output) %>% 
    select(-CHANGE) %>%
    rename(label=node) %>%
    select(label,family_gain,family_loss,gene_gain,gene_loss) %>%
    droplevels.data.frame()
  
  #add count gene gain loss info to tree_object
  tree_dat = count_tree %>% 
    treeio::as_tibble() %>% 
    left_join(count_output,by='label') %>%
    left_join(metadata,by=c('label'='isolate')) %>%
    as.treedata()
  
  count_cladeA_tree <- extract_subtree_treeio(tree_dat,c('GCA.009102525.1.ASM910252v1','GCA.009093695.1.ASM909369v1'))
  count_cladeA_tree_plot <- plot_family_gain(count_cladeA_tree,'cladeA',100)
  ggsave(count_cladeA_tree_plot,file=paste0(output_prefix,'_family_cladeA.pdf'),height=10)
  count_cladeA_tree_plot <- plot_gene_gain(count_cladeA_tree,'cladeA',100)
  print(count_cladeA_tree_plot)
  ggsave(count_cladeA_tree_plot,file=paste0(output_prefix,'_gene_cladeA.pdf'),height=10)
  
  count_cladeB_tree <- extract_subtree_treeio(tree_dat,c('P17.D4','GCA.003464445.1.ASM346444v1'))
  count_cladeB_tree_plot <- plot_family_gain(count_cladeB_tree,'cladeB',100)
  ggsave(count_cladeB_tree_plot,file=paste0(output_prefix,'_family_cladeB.pdf'),height=10)
  count_cladeB_tree_plot <- plot_gene_gain(count_cladeB_tree,'cladeB',100)
  print(count_cladeB_tree_plot)
  ggsave(count_cladeB_tree_plot,file=paste0(output_prefix,'_gene_cladeB.pdf'),height=10)
  
  count_cladeC_tree <- extract_subtree_treeio(tree_dat,c('P21.4E','GCA.009102685.1.ASM910268v1'))
  count_cladeC_tree_plot <- plot_family_gain(count_cladeC_tree,'cladeC',100)
  ggsave(count_cladeC_tree_plot,file=paste0(output_prefix,'_family_cladeC.pdf'),height=10)
  count_cladeC_tree_plot <- plot_gene_gain(count_cladeC_tree,'cladeC',100)
  print(count_cladeC_tree_plot)
  ggsave(count_cladeC_tree_plot,file=paste0(output_prefix,'_gene_cladeC.pdf'),height=10)
  
  count_captive1Node  <- extract_subtree_treeio(tree_dat,c('P17.D4','P13.A9'))
  count_captive1Node_plot <- plot_family_gain(count_captive1Node,'Mixed host captive clade',0)
  ggsave(count_captive1Node_plot,file=paste0(output_prefix,'_family_captive1.pdf'),height=10)
  count_captive1Node_plot <- plot_gene_gain(count_captive1Node,'Mixed host captive clade',0)
  print(count_captive1Node_plot)
  ggsave(count_captive1Node_plot,file=paste0(output_prefix,'_gene_captive1.pdf'),height=10)
  
  count_captive2Node  <- extract_subtree_treeio(tree_dat,c('P21.4E','P21.11C'))
  count_captive2Node_plot <- plot_family_gain(count_captive2Node,'Mixed host captive clade',0)
  ggsave(count_captive2Node_plot,file=paste0(output_prefix,'_family_captive2.pdf'),height=10)
  count_captive2Node_plot <- plot_gene_gain(count_captive2Node,'Mixed host captive clade',0)
  print(count_captive2Node_plot)
  ggsave(count_captive2Node_plot,file=paste0(output_prefix,'_gene_captive2.pdf'),height=10)
  
  count_captive3Node  <- extract_subtree_treeio(tree_dat,c('P21.6E','P21.2G'))
  count_captive3Node_plot <- plot_family_gain(count_captive3Node,'Mixed host captive clade',0)
  ggsave(count_captive3Node_plot,file=paste0(output_prefix,'_family_captive3.pdf'),height=10)
  count_captive3Node_plot <- plot_gene_gain(count_captive3Node,'Mixed host captive clade',0)
  print(count_captive3Node_plot)
  ggsave(count_captive3Node_plot,file=paste0(output_prefix,'_gene_captive3.pdf'),height=10)
}

visualize_count(count_output_file = file.path(dir,'count/roary_table.CHANGE'),
                tree_file = file.path(dir,'count/Bxy_count.tree'),
                output_prefix =file.path(dir,'count/roary_table'))
## Saving 7 x 10 in image
## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image

visualize_count(count_output_file = file.path(dir,'count/roary_table_no_orfans.CHANGE'),
                tree_file = file.path(dir,'count/Bxy_count.tree'),
                output_prefix =file.path(dir,'count/roary_table_no_orfans'))
## Saving 7 x 10 in image
## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image

Run count on orthofinder gene table

of_tab <- read_tsv(file=
          file.path(dir,'orthofinder/Results_Mar23/Orthogroups/Orthogroups.GeneCount.tsv'))
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   Orthogroup = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
colnames(of_tab) <- str_replace_all(colnames(of_tab),'[-_]','.')
of_tab <- of_tab %>%
  mutate(sum = rowSums(select(of_tab, -Orthogroup) > 0)) 
of_tab <- of_tab %>% filter(sum>0)
write.table(of_tab,file=file.path(dir,'count/orthofinder_table.txt'),
            sep='\t',quote=FALSE,row.names = FALSE)
of_tab_no_orfans <- of_tab %>% filter(sum>1) #remove orfans
write.table(of_tab_no_orfans,file=file.path(dir,'count/orthofinder_table_no_orfans.txt'),
            sep='\t',quote=FALSE,row.names = FALSE)

runCount(tree=file.path(dir,'count/Bxy_count.tree'),
         data=file.path(dir,'count/orthofinder_table.txt'),
         output=file.path(dir,'count/orthofinder_table'),
         gain=2)
## # A tibble: 1 x 7
##   .y.       n statistic    df      p method         gain_penalty
##   <chr> <int>     <dbl> <int>  <dbl> <chr>                 <dbl>
## 1 genes   253      2.97     1 0.0848 Kruskal-Wallis            2
runCount(tree=file.path(dir,'count/Bxy_count.tree'),
         data=file.path(dir,'count/orthofinder_table_no_orfans.txt'),
         output=file.path(dir,'count/orthofinder_table_no_orfans'),
         gain=2)
## # A tibble: 1 x 7
##   .y.       n statistic    df      p method         gain_penalty
##   <chr> <int>     <dbl> <int>  <dbl> <chr>                 <dbl>
## 1 genes   253      2.97     1 0.0848 Kruskal-Wallis            2
visualize_count(count_output_file = file.path(dir,'count/orthofinder_table.CHANGE'),
                tree_file = file.path(dir,'count/Bxy_count.tree'),
                output_prefix =file.path(dir,'count/orthofinder_table'))
## Saving 7 x 10 in image
## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image

visualize_count(count_output_file = file.path(dir,'count/orthofinder_table_no_orfans.CHANGE'),
                tree_file = file.path(dir,'count/Bxy_count.tree'),
                output_prefix =file.path(dir,'count/orthofinder_table_no_orfans'))
## Saving 7 x 10 in image
## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image

format_pres_abs_table(pres_abs_table=file.path(dir,'roary_nosplitparalogs/gene_presence_absence.csv'),
                      output=file.path(dir,'count/roary_nsp')) 

runCount(tree=file.path(dir,'count/Bxy_count.tree'),
         data=file.path(dir,'count/roary_nsp_table.txt'),
         output=file.path(dir,'count/roary_nsp_table'),
         gain=2)
## # A tibble: 1 x 7
##   .y.       n statistic    df     p method         gain_penalty
##   <chr> <int>     <dbl> <int> <dbl> <chr>                 <dbl>
## 1 genes   253     0.104     1 0.747 Kruskal-Wallis            2
runCount(tree=file.path(dir,'count/Bxy_count.tree'),
         data=file.path(dir,'count/roary_nsp_table_no_orfans.txt'),
         output=file.path(dir,'count/roary_nsp_table_no_orfans'),
         gain=2)
## # A tibble: 1 x 7
##   .y.       n statistic    df     p method         gain_penalty
##   <chr> <int>     <dbl> <int> <dbl> <chr>                 <dbl>
## 1 genes   253      1.27     1 0.259 Kruskal-Wallis            2
visualize_count(count_output_file = file.path(dir,'count/roary_nsp_table.CHANGE'),
                tree_file = file.path(dir,'count/Bxy_count.tree'),
                output_prefix =file.path(dir,'count/roary_nsp_table'))
## Saving 7 x 10 in image
## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image

visualize_count(count_output_file = file.path(dir,'count/roary_nsp_table_no_orfans.CHANGE'),
                tree_file = file.path(dir,'count/Bxy_count.tree'),
                output_prefix =file.path(dir,'count/roary_nsp_table_no_orfans'))
## Saving 7 x 10 in image
## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image
## Saving 7 x 10 in image

## Saving 7 x 10 in image

Convergent gene gain